Getting Started

# Conventional  way to import pandas 
import pandas as pd 
# Check pandas version
pd.__version__
'1.1.2'
# Show version of all packages 
pd.show_versions()
INSTALLED VERSIONS
------------------
commit           : 2a7d3326dee660824a8433ffd01065f8ac37f7d6
python           : 3.7.8.final.0
python-bits      : 64
OS               : Linux
OS-release       : 5.3.0-64-generic
Version          : #58-Ubuntu SMP Fri Jul 10 19:33:51 UTC 2020
machine          : x86_64
processor        : x86_64
byteorder        : little
LC_ALL           : None
LANG             : en_US.UTF-8
LOCALE           : en_US.UTF-8

pandas           : 1.1.2
numpy            : 1.19.2
pytz             : 2020.1
dateutil         : 2.8.1
pip              : 20.2.3
setuptools       : 47.3.1
Cython           : None
pytest           : 5.4.2
hypothesis       : None
sphinx           : 2.4.4
blosc            : None
feather          : None
xlsxwriter       : None
lxml.etree       : 4.5.2
html5lib         : None
pymysql          : None
psycopg2         : None
jinja2           : 2.11.2
IPython          : 7.14.0
pandas_datareader: 0.9.0
bs4              : 4.9.1
bottleneck       : None
fsspec           : None
fastparquet      : None
gcsfs            : None
matplotlib       : 3.3.1
numexpr          : None
odfpy            : None
openpyxl         : None
pandas_gbq       : None
pyarrow          : None
pytables         : None
pyxlsb           : None
s3fs             : None
scipy            : 1.5.2
sqlalchemy       : 1.3.19
tables           : None
tabulate         : None
xarray           : None
xlrd             : None
xlwt             : None
numba            : 0.50.1

Creating Series

# Create Series 
s1 = pd.Series([3, 6, 9, 12])
s1
0     3
1     6
2     9
3    12
dtype: int64
# Check type 
type(s1)
pandas.core.series.Series
# To see values 
s1.values
array([ 3,  6,  9, 12])
# To see index/keys 
s1.index
RangeIndex(start=0, stop=4, step=1)
# Creating labeled series 
s2 = pd.Series([200000, 300000, 4000000, 500000], index=['A', 'B', 'C', 'D'])
s2
A     200000
B     300000
C    4000000
D     500000
dtype: int64
s2.values
array([ 200000,  300000, 4000000,  500000])
s2.index
Index(['A', 'B', 'C', 'D'], dtype='object')
# Indexing
s2['A']
200000
# Boolean indexing
s2[s2 > 700000]
C    4000000
dtype: int64

Creating DataFrame

# Create a DataFrame 
data = {'Country': ['Belgium', 'India', 'Brazil'],
        'Capital': ['Brussels', 'New Delhi', 'Brasília'],
        'Population': [11190846, 1303171035, 207847528]
}

df = pd.DataFrame(data, columns=["Country", "Capital", "Population"])
df
Country Capital Population
0 Belgium Brussels 11190846
1 India New Delhi 1303171035
2 Brazil Brasília 207847528
# Check type 
type(df)
pandas.core.frame.DataFrame
# Indexing
df["Country"]
0    Belgium
1      India
2     Brazil
Name: Country, dtype: object
# or 
df.Country
0    Belgium
1      India
2     Brazil
Name: Country, dtype: object
# Boolean indexing 
df["Population"]  > 40000000
0    False
1     True
2     True
Name: Population, dtype: bool
df["Country"] == "Belgium"
0     True
1    False
2    False
Name: Country, dtype: bool
df["Capital"] == "Brasilia"
0    False
1    False
2    False
Name: Capital, dtype: bool